packages <- c(
"sf", # for working with spatial data
"spData", # to easily get world boundaires
"countrycode", # to convert between iso2 and iso3 country codes
"flowmapblue", # for interactive flow mapping
"flowmapper" # for static flow mapping using ggplot2
)
#install.packages(packages)Workshop EDSD
library(sf)Warning: package 'sf' was built under R version 4.4.3
Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(spData)Warning: package 'spData' was built under R version 4.4.3
To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`
library(countrycode)Warning: package 'countrycode' was built under R version 4.4.3
library(flowmapblue)Warning: package 'flowmapblue' was built under R version 4.4.3
library(flowmapper)Warning: package 'flowmapper' was built under R version 4.4.3
library(tidyverse)Warning: package 'ggplot2' was built under R version 4.4.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
scopus_country_flows <- read_csv("data_processed/scopus_2024_V1_scholarlymigration_countryflows_enriched.csv")Rows: 92980 Columns: 16
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): countrynamefrom, countrynameto, regionfrom, regionto, incomelevelfr...
dbl (8): n_migrations, year, gdp_per_capitafrom, gdp_per_capitato, populatio...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
countries <- spData::world
countries |>
ggplot() +
geom_sf()
ggplot() +
geom_sf(data = countries) +
geom_sf(data = st_point_on_surface(countries), color = "darkred")Warning: st_point_on_surface assumes attributes are constant over geometries
Warning in st_point_on_surface.sfc(st_geometry(x)): st_point_on_surface may not
give correct results for longitude/latitude data

countries_centroids <- countries |>
st_centroid() |> # finds a center of each country'spolygon
st_coordinates() |> # extracts numeric coorindates from the POINT geometry
as.data.frame() |> # converts the matrix to a data.frame
setNames(c("lon", "lat")) |> # renames the columns
cbind(countries) |> # adds back all the columns (except for geometry) form `countries`
select(iso_a2, lon, lat, name_long) # only selects a few columns that we really needWarning: st_centroid assumes attributes are constant over geometries
glimpse(countries_centroids)Rows: 177
Columns: 4
$ iso_a2 <chr> "FJ", "TZ", "EH", "CA", "US", "KZ", "UZ", "PG", "ID", "AR", …
$ lon <dbl> 178.56842, 34.74198, -12.18574, -96.39551, -103.57290, 67.23…
$ lat <dbl> -17.3156217, -6.2505643, 24.2783849, 60.4767578, 44.7559814,…
$ name_long <chr> "Fiji", "Tanzania", "Western Sahara", "Canada", "United Stat…
library(countrycode)
# assuming your data.frame is called `scopus_country_flows`
scopus_country_flows <- scopus_country_flows |>
mutate(
iso2codefrom = countrycode(iso3codefrom,
origin = "iso3c",
destination = "iso2c"),
iso2codeto = countrycode(iso3codeto,
origin = "iso3c",
destination = "iso2c")
)centroids_flows <- countries_centroids |>
left_join(scopus_country_flows, join_by(iso_a2==iso2codefrom))
flowmap <- flowmapblue(
locations = centroids_flows[, c("iso_a2", "lon", "lat")],
flows = centroids_flows[, c("iso_a2", "n_migrations")],
mapboxAccessToken = Sys.getenv("MAPBOX_TOKEN"),
darkMode = TRUE,
animation = FALSE,
clustering = TRUE
)
flowmap